rvest package to get the URLs for the SASP forcing and SBSP_forcing meteorological datasets.#Read URL
site_url <- 'https://snowstudies.org/archived-data/'
webpage <- read_html(site_url)
#Extract only weblinks for SASP and SBSP forcing datasets
links <- webpage %>%
html_nodes('a') %>%
.[grepl('forcing',.)] %>%
html_attr('href')
download_file and str_split_fixed commands to download the data and save it in your data folder. You can use a for loop or a map function.#Grab only file name by splitting out on forward slashes
splits <- str_split_fixed(links,'/',8)
#Keep only 8th column
dataset <- splits[,8]
#Generate file list for where data goes
file_names <- paste0('data/',dataset)
#Download data in a map
map2(links[1:2],file_names[1:2],download.file)
#Given in assignment: this code grabs the variable names from the metadata pdf file
library(pdftools)
headers <- pdf_text('https://snowstudies.org/wp-content/uploads/2022/02/Serially-Complete-Metadata-text08.pdf') %>%
readr::read_lines(.) %>%
trimws(.) %>%
str_split_fixed(.,'\\.',2) %>%
.[,2] %>%
.[1:26] %>%
str_trim(side = "left")
#Function to read in data, re-name relevant columns, append a site column
met_reader <- function(file){
name = str_split_fixed(file,'_',2)[,2] %>%
gsub('_Forcing_Data.txt','',.)
df <- read.delim(file, sep="", col.names = headers) %>%
select(year, month, day, hour, precip = precip..kg.m.2.s.1., air_temp = air.temp..K.) %>%
mutate(site = name)
}
map function to read in both meteorological files. Display a summary of your tibble.#Read in and display both files
met_data <- map_dfr(file_names, met_reader)
summary(met_data)
## year month day hour
## Min. :2003 Min. : 1.000 Min. : 1.00 Min. : 0.00
## 1st Qu.:2005 1st Qu.: 3.000 1st Qu.: 8.00 1st Qu.: 5.75
## Median :2007 Median : 6.000 Median :16.00 Median :11.50
## Mean :2007 Mean : 6.472 Mean :15.76 Mean :11.50
## 3rd Qu.:2009 3rd Qu.: 9.000 3rd Qu.:23.00 3rd Qu.:17.25
## Max. :2011 Max. :12.000 Max. :31.00 Max. :23.00
## precip air_temp site
## Min. :0.000e+00 Min. :242.1 Length:138336
## 1st Qu.:0.000e+00 1st Qu.:265.8 Class :character
## Median :0.000e+00 Median :272.6 Mode :character
## Mean :3.838e-05 Mean :272.6
## 3rd Qu.:0.000e+00 3rd Qu.:279.7
## Max. :6.111e-03 Max. :295.8
air temp [K] variable). Is there anything suspicious in the plot? Adjust your filtering if needed.#Calculate mean temp by year and site
annual_temp <- met_data %>%
group_by(year,site) %>%
summarize(mean_temp = mean(`air_temp`))
#Plot mean temp by year and site
ggplot(annual_temp, aes(x=year,y=mean_temp,color=site)) +
geom_line() +
theme_few() +
scale_color_few() +
theme(legend.position = c(0.85,0.2)) +
xlab("Year") + ylab("Mean air temperature (K)")
The mean annual temperature for 2003 is much lower than other years. Checking the data shows incomplete annual data for 2003 (only Nov and Dec) and 2011 (missing Oct, Nov, and Dec). Those years will be filtered out below.
#Calculate mean temp by year and site (excluding 2003,2011)
annual_temp <- met_data %>%
filter(!(year %in% c(2003,2011))) %>%
group_by(year,site) %>%
summarize(mean_temp = mean(`air_temp`))
#Plot mean temp by year and site (excluding 2003,2011)
ggplot(annual_temp, aes(x=year,y=mean_temp,color=site)) +
geom_line() +
theme_few() +
scale_color_few() +
theme(legend.position = c(0.85,0.2)) +
xlab("Year") + ylab("Mean air temperature (K)")
No, monthly average air temperatures at SBSP are at no point warmer than SASP between 2005 and 2010.
#Function to make line plots of monthly mean temps (by site) for a given year
monthly_temp_f <- function(yr,df){
plot_df <- df %>%
filter(year == yr) %>%
group_by(month,site) %>%
summarize(mean_temp = mean(air_temp))
a <- ggplot(plot_df, aes(x=month,y=mean_temp,color=site)) +
geom_line() +
theme_few() +
scale_color_few() +
theme(legend.position = c(0.9,0.85)) +
xlab("Month") + ylab("Mean air temperature (K)") +
ggtitle(as.character(yr))
print(a)
}
#Loop from 2005-2010
for(yr in 2005:2010){
monthly_temp_f(yr, met_data)
}
Bonus #1: Make a plot of average daily precipitation by day of year (averaged across all available years).
#Create "day of year" column
met_data$date <- as.Date(with(met_data, paste(year, month, day, sep="/")), "%Y/%m/%d")
met_data$doy <- yday(met_data$date)
#Calculate mean daily precipitation by day of year
daily_mean_precip <- met_data %>%
group_by(doy,site) %>%
summarize(mean_precip = mean(precip)) %>%
mutate(daily_precip = mean_precip * 24)
#Plot mean daily precipitation by day of year
ggplot(daily_mean_precip, aes(x=doy,y=daily_precip,color=site)) +
geom_line() +
theme_few() +
scale_color_few() +
theme(legend.position = "none") +
xlab("Day of year") + ylab("Mean daily precipitation (kg/m2/s)")
Bonus #2: Use a function and for loop to create yearly plots of precipitation by day of year.
#Function to plot annual precipitation by day of year
daily_precip_f <- function(yr,df){
plot_df <- df %>%
filter(year == yr) %>%
group_by(doy,site) %>%
summarize(mean_precip = mean(precip)) %>%
mutate(daily_precip = mean_precip * 24)
a <- ggplot(plot_df, aes(x=doy,y=daily_precip,color=site)) +
geom_line() +
theme_few() +
scale_color_few() +
theme(legend.position = "none") +
xlab("Day of year") + ylab("Mean daily precipitation (kg/m2/s)") +
ggtitle(as.character(yr))
print(a)
}
#Loop from 2004-2010
for(yr in 2004:2010){
daily_precip_f(yr, met_data)
}